##### 0.1 Preliminaries: setting working directory #######################################################################
## define working directory on each computer
wd_laptop <- "/Users/mark.hebblewhite/Dropbox/WILD 562/Spring2017/lab7/"
wd_desktop <- "/Users/mark.hebblewhite/Documents/Teaching/UofMcourses/WILD562/Spring2017/Labs/lab7/"
## automatically set working directory depending which computer you're on
ifelse(file.exists(wd_laptop), setwd(wd_laptop), setwd(wd_desktop))
## [1] "/Users/mark.hebblewhite/Dropbox/WILD 562/Spring2017/Lab7"
getwd()
## [1] "/Users/mark.hebblewhite/Dropbox/WILD 562/Spring2017/Lab7"
list.files() ## handy command to see what is inside the working directory
## [1] "figures"
## [2] "lab7_elk_migrant.csv"
## [3] "Lab7.Rmd"
## [4] "Lab7code.R"
## [5] "lab8_elk_migrant_used.csv"
## [6] "Lecture_15_MixedModels_.ppt"
## [7] "R-sig-mixed-models Digest, Vol 59, Issue 18.pdf"
## [8] "TREE_Bolker_GLMM_Ecology_2009.pdf"
## [9] "WILD562-Lab7_Mixed-effects RSFs_final.docx"
## [10] "wolfkde.csv"
wolfkde2 <- read.csv("wolfkde.csv", header=TRUE, sep = ",", na.strings="NA", dec=".")
wolfkde3 <-na.omit(wolfkde2)
wolfkde3$usedFactor <-as.factor(wolfkde3$usedFactor)
head(wolfkde3)
## deer_w2 moose_w2 elk_w2 sheep_w2 goat_w2 wolf_w2 Elevation2
## 1 4 5 5 3 3 5 1766.146
## 2 4 4 4 1 3 4 1788.780
## 3 4 5 5 4 1 5 1765.100
## 4 4 5 5 4 1 5 1742.913
## 6 1 1 1 1 4 1 1778.360
## 7 4 5 5 4 1 5 1764.313
## DistFromHumanAccess2 DistFromHighHumanAccess2 landcover16 EASTING
## 1 427.3962 9367.8168 8 580840
## 2 360.5043 10398.5999 2 580000
## 3 283.6648 10296.5167 2 579800
## 4 167.4134 6347.8193 2 583803
## 6 622.6257 723.7941 13 588573
## 7 373.2101 9331.2403 2 580785
## NORTHING pack used usedFactor habitatType landcov.f
## 1 5724800 Red Deer 1 1 Shrub Shrub
## 2 5724195 Red Deer 1 1 Moderate Conifer Moderate Conifer
## 3 5724800 Red Deer 1 1 Moderate Conifer Moderate Conifer
## 4 5725654 Red Deer 1 1 Moderate Conifer Moderate Conifer
## 6 5728804 Red Deer 1 1 Burn-Grassland Burn
## 7 5724966 Red Deer 1 1 Moderate Conifer Moderate Conifer
## closedConif modConif openConif decid regen mixed herb shrub water
## 1 0 0 0 0 0 0 0 1 0
## 2 0 1 0 0 0 0 0 0 0
## 3 0 1 0 0 0 0 0 0 0
## 4 0 1 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 7 0 1 0 0 0 0 0 0 0
## rockIce burn alpineHerb alpineShrub alpine
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 6 0 1 0 0 0
## 7 0 0 0 0 0
table(wolfkde2$pack, wolfkde2$used)
##
## 0 1
## Bow Valley 1000 320
## Red Deer 996 93
## unbalanced sample sizes between packs
top.env <- glm(used ~ Elevation2 + DistFromHighHumanAccess2 + openConif+modConif+closedConif+mixed+herb+shrub+water+burn, family=binomial(logit), data=wolfkde3)
summary(top.env)
##
## Call:
## glm(formula = used ~ Elevation2 + DistFromHighHumanAccess2 +
## openConif + modConif + closedConif + mixed + herb + shrub +
## water + burn, family = binomial(logit), data = wolfkde3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0290 -0.5020 -0.1576 -0.0366 3.2732
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.570e+00 8.805e-01 10.869 < 2e-16 ***
## Elevation2 -6.782e-03 4.883e-04 -13.888 < 2e-16 ***
## DistFromHighHumanAccess2 1.867e-04 3.511e-05 5.317 1.05e-07 ***
## openConif 8.457e-01 4.404e-01 1.920 0.0548 .
## modConif -1.716e-02 3.836e-01 -0.045 0.9643
## closedConif -1.126e-01 3.944e-01 -0.286 0.7752
## mixed 1.325e+00 5.435e-01 2.438 0.0148 *
## herb 8.564e-01 5.525e-01 1.550 0.1212
## shrub 5.781e-01 4.486e-01 1.289 0.1974
## water 8.559e-01 6.389e-01 1.340 0.1804
## burn 1.861e+00 4.629e-01 4.021 5.80e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2040.9 on 2117 degrees of freedom
## Residual deviance: 1298.3 on 2107 degrees of freedom
## AIC: 1320.3
##
## Number of Fisher Scoring iterations: 7
## but subset by packs
top.env.bv <- glm(used ~ Elevation2 + DistFromHighHumanAccess2 + openConif+modConif+closedConif+mixed+herb+shrub+water+burn, family=binomial(logit), data=wolfkde3, subset=pack== "Bow Valley")
summary(top.env.bv)
##
## Call:
## glm(formula = used ~ Elevation2 + DistFromHighHumanAccess2 +
## openConif + modConif + closedConif + mixed + herb + shrub +
## water + burn, family = binomial(logit), data = wolfkde3,
## subset = pack == "Bow Valley")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4530 -0.4758 -0.0414 -0.0003 3.3036
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 16.9380986 1.8420778 9.195 < 2e-16 ***
## Elevation2 -0.0115985 0.0012665 -9.158 < 2e-16 ***
## DistFromHighHumanAccess2 -0.0007834 0.0003641 -2.152 0.03143 *
## openConif 0.1721535 0.6187672 0.278 0.78084
## modConif -0.0932573 0.5416335 -0.172 0.86330
## closedConif 0.4211620 0.5823914 0.723 0.46958
## mixed 0.5665385 0.6535562 0.867 0.38602
## herb 0.5651405 0.7075328 0.799 0.42444
## shrub 0.2971577 0.6316810 0.470 0.63805
## water 0.7625159 0.7857238 0.970 0.33182
## burn 2.3602747 0.8249694 2.861 0.00422 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1431.29 on 1280 degrees of freedom
## Residual deviance: 829.15 on 1270 degrees of freedom
## AIC: 851.15
##
## Number of Fisher Scoring iterations: 8
## but subset by packs
top.env.rd <- glm(used ~ Elevation2 + DistFromHighHumanAccess2 + openConif+modConif+closedConif+mixed+herb+shrub+water+burn, family=binomial(logit), data=wolfkde3, subset=pack== "Red Deer")
summary(top.env.rd)
##
## Call:
## glm(formula = used ~ Elevation2 + DistFromHighHumanAccess2 +
## openConif + modConif + closedConif + mixed + herb + shrub +
## water + burn, family = binomial(logit), data = wolfkde3,
## subset = pack == "Red Deer")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.50698 -0.44662 -0.13708 -0.08183 3.05222
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.042e+00 1.748e+00 2.313 0.020741 *
## Elevation2 -3.926e-03 7.560e-04 -5.193 2.06e-07 ***
## DistFromHighHumanAccess2 5.166e-05 3.835e-05 1.347 0.177929
## openConif 2.543e+00 7.146e-01 3.558 0.000374 ***
## modConif 1.174e+00 7.075e-01 1.659 0.097072 .
## closedConif 1.305e+00 7.230e-01 1.805 0.071001 .
## mixed 3.637e+00 1.040e+00 3.497 0.000470 ***
## herb 2.080e+00 1.061e+00 1.960 0.049991 *
## shrub 2.077e+00 7.846e-01 2.647 0.008117 **
## water -1.259e+01 6.891e+02 -0.018 0.985427
## burn 2.807e+00 7.397e-01 3.795 0.000148 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 527.75 on 836 degrees of freedom
## Residual deviance: 374.43 on 826 degrees of freedom
## AIC: 396.43
##
## Number of Fisher Scoring iterations: 14
## Note lots of differnces between packs that we have already seen
## how to fit with one model?
top.env.mixed <- glmer(used~Elevation2 + DistFromHighHumanAccess2 + openConif+modConif+closedConif+mixed+herb+shrub+water+burn+(1|pack), data=wolfkde3,family=binomial(link="logit"))
summary(top.env.mixed)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## used ~ Elevation2 + DistFromHighHumanAccess2 + openConif + modConif +
## closedConif + mixed + herb + shrub + water + burn + (1 | pack)
## Data: wolfkde3
##
## AIC BIC logLik deviance df.resid
## 1308.5 1376.4 -642.3 1284.5 2106
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4613 -0.3585 -0.0978 -0.0205 11.9116
##
## Random effects:
## Groups Name Variance Std.Dev.
## pack (Intercept) 0.2994 0.5472
## Number of obs: 2118, groups: pack, 2
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.149e+01 1.116e+00 10.291 < 2e-16 ***
## Elevation2 -7.683e-03 5.713e-04 -13.449 < 2e-16 ***
## DistFromHighHumanAccess2 1.252e-04 3.856e-05 3.248 0.001163 **
## openConif 6.622e-01 4.497e-01 1.473 0.140873
## modConif -1.381e-01 3.920e-01 -0.352 0.724612
## closedConif -1.322e-01 4.030e-01 -0.328 0.742949
## mixed 1.199e+00 5.482e-01 2.187 0.028734 *
## herb 7.328e-01 5.602e-01 1.308 0.190819
## shrub 4.547e-01 4.599e-01 0.989 0.322789
## water 6.873e-01 6.407e-01 1.073 0.283396
## burn 1.631e+00 4.773e-01 3.417 0.000634 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Elvtn2 DFHHA2 opnCnf modCnf clsdCn mixed herb shrub
## Elevation2 -0.871
## DstFrmHgHA2 0.258 -0.415
## openConif -0.406 0.149 0.007
## modConif -0.481 0.185 0.030 0.803
## closedConif -0.341 0.035 0.092 0.759 0.883
## mixed -0.378 0.173 -0.005 0.580 0.677 0.633
## herb -0.362 0.162 -0.017 0.564 0.657 0.616 0.478
## shrub -0.395 0.145 -0.005 0.678 0.786 0.744 0.568 0.553
## water -0.330 0.153 0.012 0.498 0.583 0.545 0.424 0.411 0.488
## burn -0.330 0.082 0.010 0.641 0.733 0.702 0.526 0.515 0.624
## water
## Elevation2
## DstFrmHgHA2
## openConif
## modConif
## closedConif
## mixed
## herb
## shrub
## water
## burn 0.451
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
## convergence code: 0
## Model failed to converge with max|grad| = 0.912625 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
#fit warnings:
# Some predictor variables are on very different scales: consider rescaling
#convergence code: 0
# Model failed to converge with max|grad| = 0.912625 (tol = 0.001, component 1)
#Model is nearly unidentifiable: very large eigenvalue
#- Rescale variables?
#Model is nearly unidentifiable: large eigenvalue ratio
#- Rescale variables?
#? scale ## note the defaults are to center to 0 and scale by dividing the centered x values by their standard deviation.
## The new variable has a mean = 0 and are now expressed in units +/- of standard deviations.
## This is a very common/important step in the fitting of GLMM's
## This also has the direct advantage of being able to now directly compare the coefficients of continuous covariates in terms of SDs.
## Finally, not we do not really ever scale categorical variables.
head(wolfkde3)
## deer_w2 moose_w2 elk_w2 sheep_w2 goat_w2 wolf_w2 Elevation2
## 1 4 5 5 3 3 5 1766.146
## 2 4 4 4 1 3 4 1788.780
## 3 4 5 5 4 1 5 1765.100
## 4 4 5 5 4 1 5 1742.913
## 6 1 1 1 1 4 1 1778.360
## 7 4 5 5 4 1 5 1764.313
## DistFromHumanAccess2 DistFromHighHumanAccess2 landcover16 EASTING
## 1 427.3962 9367.8168 8 580840
## 2 360.5043 10398.5999 2 580000
## 3 283.6648 10296.5167 2 579800
## 4 167.4134 6347.8193 2 583803
## 6 622.6257 723.7941 13 588573
## 7 373.2101 9331.2403 2 580785
## NORTHING pack used usedFactor habitatType landcov.f
## 1 5724800 Red Deer 1 1 Shrub Shrub
## 2 5724195 Red Deer 1 1 Moderate Conifer Moderate Conifer
## 3 5724800 Red Deer 1 1 Moderate Conifer Moderate Conifer
## 4 5725654 Red Deer 1 1 Moderate Conifer Moderate Conifer
## 6 5728804 Red Deer 1 1 Burn-Grassland Burn
## 7 5724966 Red Deer 1 1 Moderate Conifer Moderate Conifer
## closedConif modConif openConif decid regen mixed herb shrub water
## 1 0 0 0 0 0 0 0 1 0
## 2 0 1 0 0 0 0 0 0 0
## 3 0 1 0 0 0 0 0 0 0
## 4 0 1 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 7 0 1 0 0 0 0 0 0 0
## rockIce burn alpineHerb alpineShrub alpine
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 6 0 1 0 0 0
## 7 0 0 0 0 0
wolfkde3$Elevation2_sc <-scale(wolfkde3$Elevation2)
hist(wolfkde3$Elevation2)
hist(wolfkde3$Elevation2_sc)
plot(wolfkde3$Elevation2, wolfkde3$Elevation2_sc)
summary(wolfkde3$Elevation2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1401 1537 1918 1937 2259 3112
summary(wolfkde3$Elevation2_sc)
## V1
## Min. :-1.30701
## 1st Qu.:-0.97482
## Median :-0.04601
## Mean : 0.00000
## 3rd Qu.: 0.78329
## Max. : 2.86246
wolfkde3$DistFromHighHumanAccess2_sc <- scale(wolfkde3$DistFromHighHumanAccess2)
plot(wolfkde3$DistFromHighHumanAccess2_sc, wolfkde3$DistFromHighHumanAccess2)
top.env2 <- glm(used ~ Elevation2_sc + DistFromHighHumanAccess2_sc + openConif+modConif+closedConif+mixed+herb+shrub+water+burn, family=binomial(logit), data=wolfkde3)
summary(top.env2)
##
## Call:
## glm(formula = used ~ Elevation2_sc + DistFromHighHumanAccess2_sc +
## openConif + modConif + closedConif + mixed + herb + shrub +
## water + burn, family = binomial(logit), data = wolfkde3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0290 -0.5020 -0.1576 -0.0366 3.2732
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.10227 0.36760 -8.439 < 2e-16 ***
## Elevation2_sc -2.78257 0.20035 -13.888 < 2e-16 ***
## DistFromHighHumanAccess2_sc 0.64346 0.12102 5.317 1.05e-07 ***
## openConif 0.84574 0.44043 1.920 0.0548 .
## modConif -0.01716 0.38364 -0.045 0.9643
## closedConif -0.11262 0.39436 -0.286 0.7752
## mixed 1.32511 0.54351 2.438 0.0148 *
## herb 0.85637 0.55251 1.550 0.1212
## shrub 0.57814 0.44856 1.289 0.1974
## water 0.85593 0.63892 1.340 0.1804
## burn 1.86121 0.46290 4.021 5.80e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2040.9 on 2117 degrees of freedom
## Residual deviance: 1298.3 on 2107 degrees of freedom
## AIC: 1320.3
##
## Number of Fisher Scoring iterations: 7
## Now you can directly compare the coefficients of Elevation and Distance from high human access?
## Which one has a 'stronger' effect?
## Comparing the categorical landcover coefficients, which one is the strongest ? (burn)
## how to fit with one model?
top.env.mixed2 <- glmer(used~Elevation2_sc + DistFromHighHumanAccess2_sc + openConif+modConif+closedConif+mixed+herb+shrub+water+burn+(1|pack), data=wolfkde3,family=binomial(link="logit"))
summary(top.env.mixed2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: used ~ Elevation2_sc + DistFromHighHumanAccess2_sc + openConif +
## modConif + closedConif + mixed + herb + shrub + water + burn +
## (1 | pack)
## Data: wolfkde3
##
## AIC BIC logLik deviance df.resid
## 1308.5 1376.4 -642.3 1284.5 2106
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4613 -0.3585 -0.0978 -0.0205 11.9121
##
## Random effects:
## Groups Name Variance Std.Dev.
## pack (Intercept) 0.2994 0.5472
## Number of obs: 2118, groups: pack, 2
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.0829 0.5424 -5.684 1.32e-08 ***
## Elevation2_sc -3.1523 0.2337 -13.490 < 2e-16 ***
## DistFromHighHumanAccess2_sc 0.4316 0.1326 3.254 0.001137 **
## openConif 0.6622 0.4496 1.473 0.140815
## modConif -0.1381 0.3919 -0.352 0.724629
## closedConif -0.1321 0.4027 -0.328 0.742896
## mixed 1.1990 0.5481 2.187 0.028707 *
## herb 0.7329 0.5602 1.308 0.190771
## shrub 0.4548 0.4599 0.989 0.322730
## water 0.6873 0.6407 1.073 0.283347
## burn 1.6309 0.4773 3.417 0.000633 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Elvt2_ DFHHA2 opnCnf modCnf clsdCn mixed herb shrub
## Elevatn2_sc 0.170
## DstFrmHHA2_ -0.131 -0.409
## openConif -0.529 0.152 0.000
## modConif -0.605 0.189 0.023 0.802
## closedConif -0.614 0.038 0.085 0.759 0.883
## mixed -0.425 0.175 -0.009 0.580 0.677 0.633
## herb -0.416 0.164 -0.021 0.564 0.657 0.616 0.478
## shrub -0.516 0.148 -0.012 0.678 0.786 0.743 0.568 0.553
## water -0.364 0.156 0.008 0.498 0.583 0.545 0.424 0.411 0.488
## burn -0.509 0.084 0.004 0.641 0.733 0.702 0.526 0.515 0.624
## water
## Elevatn2_sc
## DstFrmHHA2_
## openConif
## modConif
## closedConif
## mixed
## herb
## shrub
## water
## burn 0.451
## see - nasty error messages are gone.
AIC(top.env2, top.env.mixed2, top.env.rd, top.env.bv)
## df AIC
## top.env2 11 1320.2833
## top.env.mixed2 12 1308.5431
## top.env.rd 11 396.4251
## top.env.bv 11 851.1538
# Warning message:
# In AIC.default(top.env, top.env.mixed, top.env.rd, top.env.bv) :
# models are not all fitted to the same number of observations
## Note that we get this error message because top.env.rd and top.env.bv are fit with only subsets of the data.
## However, recall that for 1 dataset, AIC and LL is additive. Thus, the 'additive' AIC of the two 'separate' models is
## 396.4251 + 851.1538 = 1247.58 which is substantively better than the mixed effect model
# Bring in data
elk <- read.table("lab7_elk_migrant.csv", header=TRUE, sep=",", na.strings="NA", dec=".", strip.white=TRUE)
head(elk)
## idn used elkuid migrant migcode year daynight Night season bioseason
## 1 26540 1 73 Migrant 1 2004 Night 1 summer s04
## 2 16500 1 78 Migrant 1 2004 Night 1 summer s04
## 3 18228 1 78 Migrant 1 2004 Night 1 summer s04
## 4 12199 1 96 Migrant 1 2004 Day 0 summer s04
## 5 16259 1 73 Migrant 1 2004 Night 1 summer s04
## 6 11820 1 59 Migrant 1 2004 Day 0 summer s04
## utmx utmy elevgis slope northsl flatslo southsl caspv v23 celev
## 1 544112.8 5714053 1950 29.21 1 0 0 2 10 4
## 2 558758.5 5703733 2169 21.64 0 0 1 5 10 3
## 3 559644.9 5699787 1922 17.14 0 0 1 3 10 3
## 4 573832.5 5741766 2136 26.29 0 0 1 3 10 4
## 5 545251.9 5713114 1858 15.60 1 0 0 2 10 3
## 6 573836.3 5741912 2186 22.70 0 0 1 3 10 4
## v25 hshade2 wetness green distdiv landcov landtype totalshrub
## 1 12 72 10.27 9 3.82 8 Shrubs 0
## 2 10 217 10.67 10 9.51 8 Shrubs 415
## 3 11 210 10.28 9 9.96 8 Shrubs 425
## 4 10 240 10.76 9 44.31 7 Herbaceous 227
## 5 11 130 10.85 9 3.86 8 Shrubs 484
## 6 10 235 10.56 9 44.42 7 Herbaceous 229
## totalforb totalherb ctotrisk ctotrisk2 riskforage for2 risk2
## 1 53 91 0.00037744 1.42e-07 0.0343470 8281 1.42e-07
## 2 56 90 0.00088534 7.84e-07 0.0796803 8100 7.84e-07
## 3 53 91 0.00093215 8.69e-07 0.0848261 8281 8.69e-07
## 4 63 91 0.00095381 9.10e-07 0.0867966 8281 9.10e-07
## 5 49 89 0.00064756 4.19e-07 0.0576325 7921 4.19e-07
## 6 64 90 0.00111038 1.23e-06 0.0999344 8100 1.23e-06
elk$elkuidF <- as.factor(elk$elkuid)
#### Get to know our data graphically
ggplot(elk, aes(x=utmx, y = utmy, color = elkuidF)) + geom_point()
## how about 'just' the telemetry locations?
elk.used <- subset(elk, elk$used == 1)
ggplot(elk.used, aes(x=utmx, y = utmy, color = elkuidF)) + geom_point()
##### What kind of sampling design is this?
# write.csv(elk.used, "lab8_elk_migrant_used.csv") ## we might want to use this later, like in Lab 8
# get to know our data
table(elk$elkuid, elk$year)
##
## 2002 2003 2004
## 2 0 1328 0
## 5 0 698 0
## 25 0 944 0
## 29 0 842 0
## 42 0 1313 0
## 56 0 0 417
## 59 0 0 928
## 73 0 0 1373
## 74 0 0 1197
## 78 0 0 903
## 90 0 0 316
## 92 0 0 1390
## 93 0 0 1453
## 94 0 0 1324
## 96 0 0 1399
## 104 0 0 1388
## 196 141 0 0
table(elk$elkuid, elk$used)
##
## 0 1
## 2 2398 1328
## 5 538 698
## 25 2372 944
## 29 3711 842
## 42 939 1313
## 56 729 417
## 59 284 928
## 73 468 1373
## 74 1432 1197
## 78 211 903
## 90 52 316
## 92 485 1390
## 93 884 1453
## 94 766 1324
## 96 1997 1399
## 104 1247 1388
## 196 112 141
hist(elk$ctotrisk)
hist(log(elk$ctotrisk))
summary(elk[,30:31]) ## So 1579 NA's predation risk values, and 11 NA's for total herbaceous vegetation
## totalherb ctotrisk
## Min. : 0.00 Min. :0.0001
## 1st Qu.: 0.00 1st Qu.:0.0010
## Median : 11.00 Median :0.0041
## Mean : 20.36 Mean :0.0470
## 3rd Qu.: 28.00 3rd Qu.:0.0200
## Max. :224.00 Max. :1.8900
## NA's :11 NA's :1579
length(elk$ctotrisk)
## [1] 35979
#### Subset dataset for complete.cases where there are no NA data - why is this important? How can we compare models using AIC with different number of rows?
elk2 <- elk[complete.cases(elk[30:31]), ]
summary(elk2)
## idn used elkuid migrant
## Min. : 1 Min. :0.0000 Min. : 2.00 Migrant:34390
## 1st Qu.:14900 1st Qu.:0.0000 1st Qu.: 29.00
## Median :27776 Median :0.0000 Median : 73.00
## Mean :32498 Mean :0.4956 Mean : 59.82
## 3rd Qu.:47834 3rd Qu.:1.0000 3rd Qu.: 93.00
## Max. :76102 Max. :1.0000 Max. :196.00
##
## migcode year daynight Night season
## Min. :1 Min. :2002 Day :18881 Min. :0.000 summer:34390
## 1st Qu.:1 1st Qu.:2003 Night:15509 1st Qu.:0.000
## Median :1 Median :2004 Median :0.000
## Mean :1 Mean :2004 Mean :0.451
## 3rd Qu.:1 3rd Qu.:2004 3rd Qu.:1.000
## Max. :1 Max. :2004 Max. :1.000
## NA's :17346
## bioseason utmx utmy elevgis
## s02: 5821 Min. :541527 Min. :5693412 Min. :1531
## s03:10832 1st Qu.:572150 1st Qu.:5711678 1st Qu.:1765
## s04:17737 Median :583955 Median :5721021 Median :2026
## Mean :581250 Mean :5722497 Mean :2028
## 3rd Qu.:596105 3rd Qu.:5732660 3rd Qu.:2258
## Max. :607515 Max. :5757673 Max. :3307
##
## slope northsl flatslo southsl
## Min. : 0.00 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.: 5.78 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :13.94 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :15.21 Mean :0.4477 Mean :0.09889 Mean :0.4534
## 3rd Qu.:23.34 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :60.26 Max. :1.0000 Max. :1.00000 Max. :1.0000
##
## caspv v23 celev v25
## Min. : 1.000 Min. : 5.000 Min. :1.000 Min. : 2.000
## 1st Qu.: 3.000 1st Qu.:10.000 1st Qu.:2.000 1st Qu.: 7.000
## Median : 5.000 Median :10.000 Median :3.000 Median : 9.000
## Mean : 4.877 Mean : 9.935 Mean :2.923 Mean : 8.328
## 3rd Qu.: 7.000 3rd Qu.:10.000 3rd Qu.:4.000 3rd Qu.:10.000
## Max. :10.000 Max. :10.000 Max. :9.000 Max. :17.000
##
## hshade2 wetness green distdiv
## Min. : 0 Min. : 6.51 Min. : 1.00 Min. : 1.34
## 1st Qu.:155 1st Qu.: 9.22 1st Qu.: 6.00 1st Qu.:30.04
## Median :180 Median :10.17 Median : 7.00 Median :43.78
## Mean :175 Mean :10.64 Mean : 6.65 Mean :39.05
## 3rd Qu.:202 3rd Qu.:11.40 3rd Qu.: 8.00 3rd Qu.:54.88
## Max. :254 Max. :27.29 Max. :10.00 Max. :66.06
##
## landcov landtype totalshrub
## Min. : 1.000 Moderate Conifer :8246 Min. : 0.0
## 1st Qu.: 2.000 Rock/Snow/Shadow :6547 1st Qu.: 0.0
## Median : 7.000 Closed Conifer :3577 Median :134.0
## Mean : 6.871 Alpine-Herbaceous:3409 Mean :152.1
## 3rd Qu.:10.000 Shrubs :3200 3rd Qu.:248.0
## Max. :16.000 Open Conifer :2990 Max. :724.0
## (Other) :6421 NA's :17346
## totalforb totalherb ctotrisk ctotrisk2
## Min. : 0.00 Min. : 0.00 Min. :0.0001000 Min. :0.000000
## 1st Qu.: 4.00 1st Qu.: 0.00 1st Qu.:0.0009938 1st Qu.:0.000001
## Median : 15.00 Median : 12.00 Median :0.0041379 Median :0.000017
## Mean : 19.81 Mean : 21.09 Mean :0.0469646 Mean :0.015396
## 3rd Qu.: 27.00 3rd Qu.: 29.00 3rd Qu.:0.0200000 3rd Qu.:0.000400
## Max. :152.00 Max. :224.00 Max. :1.8900000 Max. :3.572100
## NA's :17346
## riskforage for2 risk2 elkuidF
## Min. : 0.00000 Min. : 0 Min. :0.000000 29 : 4174
## 1st Qu.: 0.00000 1st Qu.: 0 1st Qu.:0.000001 2 : 3539
## Median : 0.03783 Median : 144 Median :0.000017 96 : 3196
## Mean : 1.96913 Mean : 1131 Mean :0.015396 25 : 3183
## 3rd Qu.: 0.38577 3rd Qu.: 841 3rd Qu.:0.000400 104 : 2579
## Max. :138.32000 Max. :50176 Max. :3.572100 74 : 2529
## (Other):15190
length(elk2$ctotrisk)
## [1] 34390
## Still need to clean up predation risk data being predicted > 1
elk2$ctotrisk[elk2$ctotrisk>1]=1
table(elk2$elkuid, elk2$year)
##
## 2002 2003 2004
## 2 0 1328 0
## 5 0 688 0
## 25 0 924 0
## 29 0 840 0
## 42 0 1313 0
## 56 0 0 417
## 59 0 0 925
## 73 0 0 1241
## 74 0 0 1195
## 78 0 0 896
## 90 0 0 198
## 92 0 0 1378
## 93 0 0 1453
## 94 0 0 1324
## 96 0 0 1397
## 104 0 0 1386
## 196 141 0 0
table(elk2$elkuid, elk2$used)
##
## 0 1
## 2 2211 1328
## 5 510 688
## 25 2259 924
## 29 3334 840
## 42 913 1313
## 56 725 417
## 59 243 925
## 73 437 1241
## 74 1334 1195
## 78 203 896
## 90 42 198
## 92 466 1378
## 93 799 1453
## 94 766 1324
## 96 1799 1397
## 104 1193 1386
## 196 112 141
# Compute sample size for each elk
n = tapply(elk$idn, elk$elkuid,length)
n
## 2 5 25 29 42 56 59 73 74 78 90 92 93 94 96
## 3726 1236 3316 4553 2252 1146 1212 1841 2629 1114 368 1875 2337 2090 3396
## 104 196
## 2635 253
# Calculate mean wolf predation risk
wolf = tapply(na.omit(elk2$ctotrisk), elk2$elkuid[which((elk2$ctotrisk!="NA")==TRUE)],mean)
wolf
## 2 5 25 29 42
## 0.0778728331 0.0114242478 0.0551941567 0.0410911816 0.0414572404
## 56 59 73 74 78
## 0.1280973679 0.0025127583 0.0016995864 0.0375634085 0.0015819426
## 90 92 93 94 96
## 0.0009772592 0.0438023751 0.0054958079 0.1224840648 0.0465707038
## 104 196
## 0.0197130435 0.3177843021
hist(wolf)
### this shows a wide variation in the exposure of elk to wolf predation risk
forage = tapply(na.omit(elk2$totalherb), elk$elkuid[which((elk2$totalherb!="NA")==TRUE)],mean)
forage
## 2 5 25 29 42 56 59 73
## 17.67354 12.64315 13.81882 11.29948 20.69169 29.69594 20.90383 25.48031
## 74 78 90 92 93 94 96 104
## 20.65224 24.12918 23.65015 21.56490 27.80511 47.33946 20.64337 22.36901
## 196
## 19.67089
hist(forage)
##### 2.1 Scaling risk and forage
str(elk2)
## 'data.frame': 34390 obs. of 36 variables:
## $ idn : int 26540 16500 18228 12199 16259 11820 11838 22804 14758 20287 ...
## $ used : int 1 1 1 1 1 1 1 1 1 1 ...
## $ elkuid : int 73 78 78 96 73 59 59 96 73 74 ...
## $ migrant : Factor w/ 1 level "Migrant": 1 1 1 1 1 1 1 1 1 1 ...
## $ migcode : int 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
## $ daynight : Factor w/ 2 levels "Day","Night": 2 2 2 1 2 1 1 1 1 1 ...
## $ Night : int 1 1 1 0 1 0 0 0 0 0 ...
## $ season : Factor w/ 1 level "summer": 1 1 1 1 1 1 1 1 1 1 ...
## $ bioseason : Factor w/ 3 levels "s02","s03","s04": 3 3 3 3 3 3 3 3 3 3 ...
## $ utmx : num 544113 558758 559645 573832 545252 ...
## $ utmy : int 5714053 5703733 5699787 5741766 5713114 5741912 5741896 5741380 5707639 5708554 ...
## $ elevgis : int 1950 2169 1922 2136 1858 2186 2177 2124 2004 2329 ...
## $ slope : num 29.2 21.6 17.1 26.3 15.6 ...
## $ northsl : int 1 0 0 0 1 0 0 0 1 0 ...
## $ flatslo : int 0 0 0 0 0 0 0 0 0 0 ...
## $ southsl : int 0 1 1 1 0 1 1 1 0 1 ...
## $ caspv : int 2 5 3 3 2 3 3 2 6 6 ...
## $ v23 : int 10 10 10 10 10 10 10 10 10 10 ...
## $ celev : int 4 3 3 4 3 4 4 4 5 4 ...
## $ v25 : int 12 10 11 10 11 10 10 9 11 11 ...
## $ hshade2 : int 72 217 210 240 130 235 235 243 40 172 ...
## $ wetness : num 10.3 10.7 10.3 10.8 10.8 ...
## $ green : int 9 10 9 9 9 9 9 8 9 9 ...
## $ distdiv : num 3.82 9.51 9.96 44.31 3.86 ...
## $ landcov : int 8 8 8 7 8 7 7 7 8 16 ...
## $ landtype : Factor w/ 17 levels "","Alpine-Herbaceous",..: 16 16 16 11 16 11 11 11 16 3 ...
## $ totalshrub: int 0 415 425 227 484 229 230 211 646 273 ...
## $ totalforb : int 53 56 53 63 49 64 64 64 55 74 ...
## $ totalherb : int 91 90 91 91 89 90 90 89 93 92 ...
## $ ctotrisk : num 0.000377 0.000885 0.000932 0.000954 0.000648 ...
## $ ctotrisk2 : num 1.42e-07 7.84e-07 8.69e-07 9.10e-07 4.19e-07 1.23e-06 1.34e-06 7.07e-07 1.70e-08 9.04e-07 ...
## $ riskforage: num 0.0343 0.0797 0.0848 0.0868 0.0576 ...
## $ for2 : int 8281 8100 8281 8281 7921 8100 8100 7921 8649 8464 ...
## $ risk2 : num 1.42e-07 7.84e-07 8.69e-07 9.10e-07 4.19e-07 1.23e-06 1.34e-06 7.07e-07 1.70e-08 9.04e-07 ...
## $ elkuidF : Factor w/ 17 levels "2","5","25","29",..: 8 10 10 15 8 7 7 15 8 9 ...
elk2$totalherb_sc <- scale(elk2$totalherb)
elk2$ctotrisk_sc <- scale(elk2$ctotrisk)
elk2$ctotrisk2_sc <- scale(elk2$ctotrisk2)
elk2$riskforage_sc <- scale(elk2$riskforage)
elk2$for2_sc <- scale(elk2$for2)
elk2$risk2_sc <- scale(elk2$risk2)
##### AGain, just to double check what scale is doing
plot(elk2$ctotrisk_sc, elk2$ctotrisk)
### Fitting best model(s)
forage = glm(used~totalherb, data=elk2,family=binomial(link="logit"))
risk = glm(used~ctotrisk, data=elk2,family=binomial(link="logit"))
forANDrisk = glm(used~totalherb+ctotrisk, data=elk2,family=binomial(link="logit"))
forrisk = glm(used~totalherb+ctotrisk+ctotrisk*totalherb, data=elk2,family=binomial(link="logit"))
AIC(forage, risk, forANDrisk, forrisk)
## df AIC
## forage 2 42568.69
## risk 2 47525.47
## forANDrisk 3 42428.53
## forrisk 4 42215.78
## Best model from AIC perspective is forrisk
summary(forrisk)
##
## Call:
## glm(formula = used ~ totalherb + ctotrisk + ctotrisk * totalherb,
## family = binomial(link = "logit"), data = elk2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.0397 -0.9646 -0.8637 1.0778 1.5279
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7938903 0.0167247 -47.468 < 2e-16 ***
## totalherb 0.0450247 0.0007724 58.290 < 2e-16 ***
## ctotrisk 0.4855086 0.1622428 2.992 0.00277 **
## totalherb:ctotrisk -0.0589087 0.0037465 -15.724 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47672 on 34389 degrees of freedom
## Residual deviance: 42208 on 34386 degrees of freedom
## AIC: 42216
##
## Number of Fisher Scoring iterations: 4
## Refit top model with random effects
forrisk_sc = glm(used~totalherb_sc+ctotrisk_sc+ctotrisk_sc*totalherb_sc, data=elk2,family=binomial(link="logit"))
summary(forrisk_sc)
##
## Call:
## glm(formula = used ~ totalherb_sc + ctotrisk_sc + ctotrisk_sc *
## totalherb_sc, family = binomial(link = "logit"), data = elk2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.0397 -0.9646 -0.8637 1.0778 1.5279
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.12031 0.01269 9.483 < 2e-16 ***
## totalherb_sc 1.10718 0.01828 60.565 < 2e-16 ***
## ctotrisk_sc -0.08504 0.01321 -6.439 1.2e-10 ***
## totalherb_sc:ctotrisk_sc -0.17336 0.01103 -15.724 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47672 on 34389 degrees of freedom
## Residual deviance: 42208 on 34386 degrees of freedom
## AIC: 42216
##
## Number of Fisher Scoring iterations: 4
# Calculate some summary statistics for forage
hist(elk$totalherb)
hist(log(elk$totalherb))
quantile(elk$totalherb,na.rm=TRUE)
## 0% 25% 50% 75% 100%
## 0 0 11 28 224
mean(elk$totalherb,na.rm=TRUE)
## [1] 20.36407
herb.lo = 5
herb.med = 15
herb.hi = 50
# Make predictions
predrisk = seq(0,1,0.01)
pred.lo = predict(forrisk,type="response", newdata = data.frame(totalherb=herb.lo, ctotrisk=predrisk))
pred.med = predict(forrisk,type="response", newdata = data.frame(totalherb=herb.med, ctotrisk=predrisk))
pred.hi = predict(forrisk,type="response", newdata = data.frame(totalherb=herb.hi, ctotrisk=predrisk))
# Make plot
plot(elk2$ctotrisk,elk2$used, xlab="Risk", ylab="Pr(Use)")
lines(predrisk,pred.lo, lty=1)
lines(predrisk,pred.med, lty=2)
lines(predrisk,pred.hi, lty=3)
legend(x=0.7,y=0.95, legend=c("Observed","Low Forage","Medium Forage","High Forage"), pch=c(1,rep(-1,3)),lty=c(-1,1:3),bty="n")
elk2$usedF <- as.factor(elk2$used)
ggplot(elk2, aes(x=ctotrisk, y = used)) + stat_smooth(method="glm", method.args = list(family="binomial"))
ggplot(elk2, aes(x=totalherb, y = used)) + stat_smooth(method="glm", method.args = list(family="binomial"))
ggplot(elk2, aes(x=riskforage, y = used)) + geom_rug() + stat_smooth(method="glm", method.args = list(family="binomial"))
#### But what does this last plot mean? Need to make a categorical variable at 3 different levels of forage
## Note I use the Hmisc package here to split into categories
elk2$forage.cat <- as.factor(as.numeric(cut2(elk2$totalherb, g=3)))
elk2$forage.cat <- cut2(elk2$totalherb, g=3)
ggplot(elk2, aes(x=ctotrisk, y = used, fill = forage.cat)) + stat_smooth(method="glm", method.args = list(family="binomial"))
elk2$risk.cat <- cut2(elk2$ctotrisk, g=3)
ggplot(elk2, aes(x=totalherb, y = used, fill = risk.cat)) + stat_smooth(method="glm", method.args = list(family="binomial"))
##### Both graphs show that the effect of the interaction is only really present at low forage levels when elk select for high predation risk, but otherwise they select high forage biomass at all times, just less when under high predation risk.
mep(forrisk_sc)
### But note it does not do interactive effects
## This next command vcovHC generates the default heteroskedastic robust standard errors in R
forrisk2 <-vcovHC(forrisk, elk$elkuid, type = "const", sandwhich = TRUE)
forrisk2
## (Intercept) totalherb ctotrisk
## (Intercept) 0.125299083 -0.0060574616 -0.49719924
## totalherb -0.006057462 0.0004609089 0.01969094
## ctotrisk -0.497199245 0.0196909374 11.29335541
## totalherb:ctotrisk 0.018132727 -0.0012165426 -0.25027016
## totalherb:ctotrisk
## (Intercept) 0.018132727
## totalherb -0.001216543
## ctotrisk -0.250270158
## totalherb:ctotrisk 0.008529721
coeftest(forrisk, forrisk2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.793890 0.353976 -2.2428 0.02491 *
## totalherb 0.045025 0.021469 2.0972 0.03597 *
## ctotrisk 0.485509 3.360559 0.1445 0.88513
## totalherb:ctotrisk -0.058909 0.092356 -0.6378 0.52358
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## now fit model with fixed effect for each individual elk
forriskFI = glm(used~totalherb+ctotrisk+ctotrisk*totalherb+elkuidF, data=elk,family=binomial(link="logit"))
summary(forriskFI)
##
## Call:
## glm(formula = used ~ totalherb + ctotrisk + ctotrisk * totalherb +
## elkuidF, family = binomial(link = "logit"), data = elk)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7927 -0.8965 -0.5560 0.9489 1.9715
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.1051164 0.0387621 -28.510 < 2e-16 ***
## totalherb 0.0391708 0.0008021 48.838 < 2e-16 ***
## ctotrisk 1.4461134 0.1727601 8.371 < 2e-16 ***
## elkuidF5 0.9896113 0.0703386 14.069 < 2e-16 ***
## elkuidF25 -0.2744382 0.0545134 -5.034 4.80e-07 ***
## elkuidF29 -0.6838856 0.0539234 -12.683 < 2e-16 ***
## elkuidF42 0.6783358 0.0582501 11.645 < 2e-16 ***
## elkuidF56 -0.5268584 0.0775606 -6.793 1.10e-11 ***
## elkuidF59 1.8831422 0.0836697 22.507 < 2e-16 ***
## elkuidF73 1.2534254 0.0689722 18.173 < 2e-16 ***
## elkuidF74 0.2151677 0.0567871 3.789 0.000151 ***
## elkuidF78 1.7463187 0.0880810 19.826 < 2e-16 ***
## elkuidF90 1.8138242 0.1770167 10.247 < 2e-16 ***
## elkuidF92 1.5737968 0.0668893 23.528 < 2e-16 ***
## elkuidF93 0.7547853 0.0615857 12.256 < 2e-16 ***
## elkuidF94 0.1034055 0.0654932 1.579 0.114365
## elkuidF96 0.1119578 0.0534965 2.093 0.036367 *
## elkuidF104 0.3946278 0.0567433 6.955 3.54e-12 ***
## elkuidF196 0.5028438 0.1398406 3.596 0.000323 ***
## totalherb:ctotrisk -0.0473504 0.0038199 -12.396 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47672 on 34389 degrees of freedom
## Residual deviance: 38921 on 34370 degrees of freedom
## (1589 observations deleted due to missingness)
## AIC: 38961
##
## Number of Fisher Scoring iterations: 4
##### Discussion - what does each coefficient mean in a used-available design?
##### What is the correspondance between the sampling ratio of used to available locations and the coefficient?
table(elk2$elkuid, elk2$used)
##
## 0 1
## 2 2211 1328
## 5 510 688
## 25 2259 924
## 29 3334 840
## 42 913 1313
## 56 725 417
## 59 243 925
## 73 437 1241
## 74 1334 1195
## 78 203 896
## 90 42 198
## 92 466 1378
## 93 799 1453
## 94 766 1324
## 96 1799 1397
## 104 1193 1386
## 196 112 141
##### Check for elk 196, 141/(141+1120) = 0.557; B is 0.502 - close enough in multiple logit model with continuous covariates
##### Why not just use this fixed effect?
AIC(forriskFI, forage, risk, forANDrisk, forrisk)
## df AIC
## forriskFI 20 38961.48
## forage 2 42568.69
## risk 2 47525.47
## forANDrisk 3 42428.53
## forrisk 4 42215.78
#### Fixed effects model for each individual
elkids = sort(unique(elk2$elkuid))
modelnames = paste("elk",elkids)
models = list()
for (i in 1:length(elkids)){
models[[i]]=glm(used~totalherb_sc+ctotrisk_sc+ctotrisk_sc*totalherb_sc, data=elk2,subset = elkuid==elkids[i], family=binomial(link="logit"))
}
names(models)=modelnames
# lapply(models,summary) #### Note I supressed this because it just spits out 17 models, 1 for each elk
# This creates a dataframe with the beta coefficients for each model/elk
coefs = as.data.frame(lapply(models, coef))
coefs
## elk.2 elk.5 elk.25 elk.29
## (Intercept) -0.6869455 -3.688158 -0.829788220 -1.1524225
## totalherb_sc -0.2663299 -3.251812 0.192937628 0.4886864
## ctotrisk_sc 0.8316429 -10.532198 -0.008045513 0.2665379
## totalherb_sc:ctotrisk_sc -0.3007834 -8.321302 0.026228478 -0.1156154
## elk.42 elk.56 elk.59 elk.73
## (Intercept) 0.6835120 -0.68528659 -19.18118 -9.394086
## totalherb_sc 0.3410619 1.48605381 11.49317 -10.666046
## ctotrisk_sc 1.2141372 -0.44268097 -52.41802 -27.132288
## totalherb_sc:ctotrisk_sc -1.2114773 -0.04525429 24.77480 -37.846209
## elk.74 elk.78 elk.90 elk.92
## (Intercept) -0.09685491 -7.7577614 4.587439 1.27108698
## totalherb_sc 0.99475243 0.2366856 -29.601970 0.99184915
## ctotrisk_sc -0.32432822 -23.6199885 6.803137 -0.41496111
## totalherb_sc:ctotrisk_sc -0.01430213 -6.2706274 -78.839052 -0.01386393
## elk.93 elk.94 elk.96 elk.104
## (Intercept) 2.7379170 -0.41491070 -0.1966565 -0.3563434
## totalherb_sc -0.5800035 1.47577645 1.3417345 2.1078975
## ctotrisk_sc 5.7292566 -0.27677408 -0.4644044 -2.0175288
## totalherb_sc:ctotrisk_sc -5.1708850 0.02966996 0.0496705 0.3361079
## elk.196
## (Intercept) -1.4266720
## totalherb_sc -0.8751487
## ctotrisk_sc 0.7051193
## totalherb_sc:ctotrisk_sc -0.2481602
#Calculate means for each of the coefficients
mean(as.numeric(coefs[1,]))
## [1] -2.152183
mean(as.numeric(coefs[2,]))
## [1] -1.4171
mean(as.numeric(coefs[3,]))
## [1] -6.005964
mean(as.numeric(coefs[4,]))
## [1] -6.657709
##### Therefore, the linear part of the two-staged model would be:
##### -2.15 - 1.417*totalherb_sc -6.006*ctotrisk_sc + 6.657*totalherb_sc:ctotrisk_sc
##### LEts make some graphical displays of the Beta coefficients across individuals
par(mfrow=c(2,2))
hist(as.numeric(coefs[1,]), main="intercept",breaks=10)
hist(as.numeric(coefs[2,]), main="Forage",breaks=10)
hist(as.numeric(coefs[3,]), main ="Risk",breaks=10)
hist(as.numeric(coefs[4,]), main="Forage*Risk",breaks=10)
##### So a biological conclusions is that there is substantial variation between individuals in their coefficients for forage, wolf predation risk and the interaction.
fr.ri = glmer(used~totalherb_sc+ctotrisk_sc+ctotrisk_sc*totalherb_sc+(1|elkuid), data=elk2,family=binomial(link="logit"), verbose=FALSE)
summary(fr.ri)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: used ~ totalherb_sc + ctotrisk_sc + ctotrisk_sc * totalherb_sc +
## (1 | elkuid)
## Data: elk2
##
## AIC BIC logLik deviance df.resid
## 39038.6 39080.8 -19514.3 39028.6 34385
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -36.423 -0.704 -0.409 0.752 2.443
##
## Random effects:
## Groups Name Variance Std.Dev.
## elkuid (Intercept) 0.624 0.7899
## Number of obs: 34390, groups: elkuid, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.35897 0.19245 1.87 0.062144 .
## totalherb_sc 0.96747 0.01913 50.58 < 2e-16 ***
## ctotrisk_sc 0.05316 0.01468 3.62 0.000293 ***
## totalherb_sc:ctotrisk_sc -0.14112 0.01133 -12.45 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ttlhr_ cttrs_
## totalhrb_sc 0.018
## ctotrisk_sc -0.004 -0.236
## ttlhrb_sc:_ -0.016 -0.308 -0.353
fr.ri2 = glmer(used~totalherb_sc+ctotrisk_sc+ctotrisk_sc*totalherb_sc+(1|elkuid), data=elk2,family=binomial(link="logit"), verbose=FALSE, nAGQ = 10)
summary(fr.ri2)
## Generalized linear mixed model fit by maximum likelihood (Adaptive
## Gauss-Hermite Quadrature, nAGQ = 10) [glmerMod]
## Family: binomial ( logit )
## Formula: used ~ totalherb_sc + ctotrisk_sc + ctotrisk_sc * totalherb_sc +
## (1 | elkuid)
## Data: elk2
##
## AIC BIC logLik deviance df.resid
## 39038.6 39080.8 -19514.3 39028.6 34385
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -36.423 -0.704 -0.409 0.752 2.443
##
## Random effects:
## Groups Name Variance Std.Dev.
## elkuid (Intercept) 0.624 0.7899
## Number of obs: 34390, groups: elkuid, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.35899 0.19244 1.87 0.062121 .
## totalherb_sc 0.96747 0.01913 50.58 < 2e-16 ***
## ctotrisk_sc 0.05316 0.01468 3.62 0.000294 ***
## totalherb_sc:ctotrisk_sc -0.14112 0.01133 -12.45 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ttlhr_ cttrs_
## totalhrb_sc 0.018
## ctotrisk_sc -0.004 -0.236
## ttlhrb_sc:_ -0.016 -0.308 -0.353
# Useful functions
fixef(fr.ri) # This is the fixed effects coefficients
## (Intercept) totalherb_sc ctotrisk_sc
## 0.35897205 0.96746832 0.05315995
## totalherb_sc:ctotrisk_sc
## -0.14111762
ranef(fr.ri) # These are the random effects, which in this model is just (1|elkuid), so, one coefficient for each individual elk
## $elkuid
## (Intercept)
## 2 -0.6165108
## 5 0.3715122
## 25 -0.8892254
## 29 -1.2973817
## 42 0.0615043
## 56 -1.1359573
## 59 1.2560879
## 73 0.6341838
## 74 -0.4005106
## 78 1.1191628
## 90 1.1440496
## 92 0.9525073
## 93 0.1383113
## 94 -0.5103378
## 96 -0.5030820
## 104 -0.2208897
## 196 -0.1153650
##### Compare these to the fixed effect coefficients estimated with an intercept for each individual elk? How do they compare?
coef(fr.ri) ### Note this shows you the entire coefficient matrix for all individual elk - note that the rest of the model is fixed for each individual elk.
## $elkuid
## (Intercept) totalherb_sc ctotrisk_sc totalherb_sc:ctotrisk_sc
## 2 -0.25753874 0.9674683 0.05315995 -0.1411176
## 5 0.73048426 0.9674683 0.05315995 -0.1411176
## 25 -0.53025340 0.9674683 0.05315995 -0.1411176
## 29 -0.93840963 0.9674683 0.05315995 -0.1411176
## 42 0.42047635 0.9674683 0.05315995 -0.1411176
## 56 -0.77698522 0.9674683 0.05315995 -0.1411176
## 59 1.61505997 0.9674683 0.05315995 -0.1411176
## 73 0.99315583 0.9674683 0.05315995 -0.1411176
## 74 -0.04153855 0.9674683 0.05315995 -0.1411176
## 78 1.47813482 0.9674683 0.05315995 -0.1411176
## 90 1.50302168 0.9674683 0.05315995 -0.1411176
## 92 1.31147935 0.9674683 0.05315995 -0.1411176
## 93 0.49728339 0.9674683 0.05315995 -0.1411176
## 94 -0.15136580 0.9674683 0.05315995 -0.1411176
## 96 -0.14410997 0.9674683 0.05315995 -0.1411176
## 104 0.13808236 0.9674683 0.05315995 -0.1411176
## 196 0.24360704 0.9674683 0.05315995 -0.1411176
##
## attr(,"class")
## [1] "coef.mer"
# Compare intercepts from two step and mixed-effects model
# Put everything in one place; I add the fixed intercept to the random effects here.
# But you could also get this from coef(fr.ri)
B0 <- cbind(as.numeric(coefs[1,]),ranef(fr.ri)$elkuid[,1]+fixef(fr.ri)[1])
rownames(B0)=rownames(ranef(fr.ri)$elkuid)
colnames(B0) = c("Two-Step","Random Effects")
str(B0)
## num [1:17, 1:2] -0.687 -3.688 -0.83 -1.152 0.684 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:17] "2" "5" "25" "29" ...
## ..$ : chr [1:2] "Two-Step" "Random Effects"
B0
## Two-Step Random Effects
## 2 -0.68694548 -0.25753874
## 5 -3.68815760 0.73048426
## 25 -0.82978822 -0.53025340
## 29 -1.15242253 -0.93840963
## 42 0.68351196 0.42047635
## 56 -0.68528659 -0.77698522
## 59 -19.18118331 1.61505997
## 73 -9.39408620 0.99315583
## 74 -0.09685491 -0.04153855
## 78 -7.75776140 1.47813482
## 90 4.58743932 1.50302168
## 92 1.27108698 1.31147935
## 93 2.73791698 0.49728339
## 94 -0.41491070 -0.15136580
## 96 -0.19665647 -0.14410997
## 104 -0.35634342 0.13808236
## 196 -1.42667197 0.24360704
par(mfrow=c(1,1))
plot(B0[,1], B0[, 2])
abline(lm(B0[,1] ~ B0[, 2]))
#### This plot shows the Two-step coefficients for each individual elk on the X axix, and the random coefficient for each individual elk from the GLMM on the Y-axis. What do you notice? What are the axes of Y and X here? Should there be a relationship between these two?
# Make a histogram
multhist(list(B0[,1],B0[,2]), xlab = "Intercept Coefficient",ylab="Frequency (# Elk)", col = c("gray","tan"))
legend("topright", legend = colnames(B0), fill = c("gray","tan"), bty = "n")
##### This histogram shows us that the mixed-effects model is estimating the distribution of random intercepts here as ‘more’ gaussian/normally distributed than the Two-stage data.
fr.rc = glmer(used~totalherb_sc+ctotrisk_sc+totalherb_sc*ctotrisk_sc+(ctotrisk_sc|elkuid), data=elk2,family=binomial(link="logit"), verbose=FALSE)
fixef(fr.rc) # This is the fixed effects coefficients
## (Intercept) totalherb_sc ctotrisk_sc
## -1.3476356 0.9977271 -4.0798003
## totalherb_sc:ctotrisk_sc
## -0.1339494
ranef(fr.rc) # These are the random effects, which in this model is just (1|elkuid), so, one coefficient for each individual elk
## $elkuid
## (Intercept) ctotrisk_sc
## 2 1.0065743 4.412453
## 5 -1.9594458 -6.573325
## 25 0.8565098 3.886802
## 29 0.4056811 4.264239
## 42 1.8008209 4.432460
## 56 0.6856829 3.960949
## 59 -9.8807933 -28.265057
## 73 -1.6320233 -5.778112
## 74 1.2846947 3.831500
## 78 -0.8906782 -5.099035
## 90 -1.0180528 -5.365349
## 92 2.6447431 3.714424
## 93 2.5845402 6.100845
## 94 1.0761510 4.329795
## 96 1.2005060 4.036728
## 104 1.3594070 3.630032
## 196 0.5456083 4.643834
coef(fr.rc)
## $elkuid
## (Intercept) totalherb_sc ctotrisk_sc totalherb_sc:ctotrisk_sc
## 2 -0.34106135 0.9977271 0.33265316 -0.1339494
## 5 -3.30708146 0.9977271 -10.65312537 -0.1339494
## 25 -0.49112588 0.9977271 -0.19299786 -0.1339494
## 29 -0.94195450 0.9977271 0.18443866 -0.1339494
## 42 0.45318528 0.9977271 0.35266009 -0.1339494
## 56 -0.66195270 0.9977271 -0.11885176 -0.1339494
## 59 -11.22842890 0.9977271 -32.34485700 -0.1339494
## 73 -2.97965897 0.9977271 -9.85791187 -0.1339494
## 74 -0.06294094 0.9977271 -0.24830047 -0.1339494
## 78 -2.23831379 0.9977271 -9.17883501 -0.1339494
## 90 -2.36568839 0.9977271 -9.44514953 -0.1339494
## 92 1.29710747 0.9977271 -0.36537584 -0.1339494
## 93 1.23690457 0.9977271 2.02104502 -0.1339494
## 94 -0.27148467 0.9977271 0.24999466 -0.1339494
## 96 -0.14712966 0.9977271 -0.04307225 -0.1339494
## 104 0.01177136 0.9977271 -0.44976846 -0.1339494
## 196 -0.80202731 0.9977271 0.56403388 -0.1339494
##
## attr(,"class")
## [1] "coef.mer"
##### Note here that the coefficient for predation risk is allowed to vary
summary(fr.rc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: used ~ totalherb_sc + ctotrisk_sc + totalherb_sc * ctotrisk_sc +
## (ctotrisk_sc | elkuid)
## Data: elk2
##
## AIC BIC logLik deviance df.resid
## 38338.0 38397.1 -19162.0 38324.0 34383
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -44 -1 0 1 64056
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## elkuid (Intercept) 8.53 2.921
## ctotrisk_sc 72.82 8.533 0.97
## Number of obs: 34390, groups: elkuid, 17
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.34764 0.64061 -2.10 0.0354 *
## totalherb_sc 0.99773 0.01972 50.60 <2e-16 ***
## ctotrisk_sc -4.07980 1.83073 -2.23 0.0258 *
## totalherb_sc:ctotrisk_sc -0.13395 0.01414 -9.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ttlhr_ cttrs_
## totalhrb_sc -0.015
## ctotrisk_sc 0.965 -0.019
## ttlhrb_sc:_ 0.004 -0.206 0.005
# Note that I use the coef function here
B0.rc = cbind(as.numeric(coefs[1,]),coef(fr.rc)$elkuid[,1])
rownames(B0.rc)=rownames(ranef(fr.ri)$elkuid)
colnames(B0.rc) = c("Two-Step","Random Effects")
B.risk = cbind(as.numeric(coefs[3,]),coef(fr.rc)$elkuid[,3])
rownames(B.risk)=rownames(ranef(fr.ri)$elkuid)
colnames(B.risk) = c("Two-Step","Random Effects")
## lets look at the Intercepts
B0.rc
## Two-Step Random Effects
## 2 -0.68694548 -0.34106135
## 5 -3.68815760 -3.30708146
## 25 -0.82978822 -0.49112588
## 29 -1.15242253 -0.94195450
## 42 0.68351196 0.45318528
## 56 -0.68528659 -0.66195270
## 59 -19.18118331 -11.22842890
## 73 -9.39408620 -2.97965897
## 74 -0.09685491 -0.06294094
## 78 -7.75776140 -2.23831379
## 90 4.58743932 -2.36568839
## 92 1.27108698 1.29710747
## 93 2.73791698 1.23690457
## 94 -0.41491070 -0.27148467
## 96 -0.19665647 -0.14712966
## 104 -0.35634342 0.01177136
## 196 -1.42667197 -0.80202731
## Next, lets look at the risk coefficients
B.risk
## Two-Step Random Effects
## 2 0.831642892 0.33265316
## 5 -10.532197770 -10.65312537
## 25 -0.008045513 -0.19299786
## 29 0.266537869 0.18443866
## 42 1.214137203 0.35266009
## 56 -0.442680972 -0.11885176
## 59 -52.418020977 -32.34485700
## 73 -27.132288060 -9.85791187
## 74 -0.324328217 -0.24830047
## 78 -23.619988516 -9.17883501
## 90 6.803137015 -9.44514953
## 92 -0.414961114 -0.36537584
## 93 5.729256627 2.02104502
## 94 -0.276774082 0.24999466
## 96 -0.464404415 -0.04307225
## 104 -2.017528782 -0.44976846
## 196 0.705119288 0.56403388
##### So, quite correlated from the 2 different models
plot(B.risk[,1], B.risk[,2])
abline(lm(B.risk[,1] ~ B.risk[, 2]))
##### So here there is some correlation between the coefficients for predation risk from the Two-stage models (X-axis) and the glmm (Y axis)
# Make histogram of betas
par(mfrow = c(1,2))
multhist(list(B0.rc[,1],B0.rc[,2]), xlab = "Intercept Coefficient",ylab="Frequency (# Elk)", col = c("gray","tan"), ylim=c(0,10))
legend(2.4,10, legend = colnames(B0), fill = c("gray","tan"), bty = "n")
multhist(list(B.risk[,1],B.risk[,2]), xlab = "Risk Coefficient",ylab="Frequency (# Elk)", col = c("gray","tan"),ylim = c(0,10))
legend(2.4,10, legend = colnames(B0), fill = c("gray","tan"), bty = "n")
##### What is the assumption about the distribution of the random effects doing to the modeled responses here?
AIC(forriskFI, forage, risk, forANDrisk, forrisk, fr.ri, fr.rc)
## df AIC
## forriskFI 20 38961.48
## forage 2 42568.69
## risk 2 47525.47
## forANDrisk 3 42428.53
## forrisk 4 42215.78
## fr.ri 5 39038.60
## fr.rc 7 38337.99
##### First lets refit the top model with out scaled coefficients
fr.rc2 = glmer(used~totalherb+ctotrisk+totalherb*ctotrisk+(ctotrisk|elkuid), data=elk2,family=binomial(link="logit"), verbose=FALSE)
# Plotting predictions from random coefficient model
# First remove NAs from the data
elk.c = elk2[which((elk2$ctotrisk!="NA"&elk2$totalherb!="NA")==TRUE),]
#Plot observed use
par(mfrow = c(1,1))
plot(elk.c$ctotrisk, elk.c$used,xlab="Risk",ylab="Pr(Use)")
# Set some variables up for nice plotting in a loop over each elk
elkids = sort(unique(elk.c$elkuid))
ltypes = rep(1:6,each=3)
lwide = rep(seq(1,3,1),each = 6)
colors = rep(c("red","black","seagreen", "violet", "tan", "orange"),3)
# Begin loop
for (i in elkids){
# To plot predictions you need to create new data for just that elk
dat = as.data.frame(model.matrix(terms(fr.rc2),elk.c)[elk.c$elkuid==i,])
dat$totalherb = mean(dat$totalherb) # Use the mean forage for an elk
dat[,colnames(dat)=="totalherb:ctotrisk"] = dat$totalherb*dat$ctotrisk # Recalculate the interaction term with the mean forage
dat$pred.prob = plogis(as.matrix(dat)%*%t(as.vector(coef(fr.rc2)$elkuid[which(elkids==i),]))) # Use matrix algebra to get prediction based on coefficients for each individual elk
ord = order(dat$ctotrisk) # store the order we want to plot in
# Draw a line for you prediction
lines(dat$ctotrisk[ord], dat$pred.prob[ord], lty=ltypes[which(elkids==i)],lwd = lwide[which(elkids==i)],col=colors[which(elkids==i)])
}
legend("right", legend = c("Observed", paste("Elk ",elkids)), pch=c(1,rep(-1,length(elkids))),lty = c(-1,ltypes[1:length(elkids)]),lwd = c(-1,lwide[1:length(elkids)]),col = c(1,colors[1:length(elkids)]), bty = "n")
ggplot(elk2, aes(x=ctotrisk, y = used, colour = elkuidF)) + stat_smooth(method="glm", method.args = list(family="binomial"))
###### Although a few things are wrong with this model. First, the model predictions are based on the fixed-effects, and fixed-effects variance (only) not taking into account the random effects variance. So the 95% CI’s are almost certainly overly precise compared to if they had included the random effects variance.
## First we do the basic predictions which are the fixed-effects ignoring random effects
elk2$naive.pred <-predict(forrisk, type = "response")
elk2$fr.rc.pred <- predict(fr.rc, type = "response")
hist(elk2$fr.rc.pred)
# note this is the same as specifying the full random effects
# elk2$fr.rc.pred3 <- predict(fr.rc, re.form = ~(ctotrisk_sc|elkuid), type = "response")
summary(elk2$fr.rc.pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2735 0.4575 0.4956 0.7127 0.9999
## First we do the basic predictions which are the fixed-effects unconditional on the random effects (i.e., Naive logit)
elk2$fr.rc.pred2 <- predict(fr.rc, re.form = NA, type = "response")
summary(elk2$fr.rc.pred2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3099 0.3763 0.3976 0.5421 0.9994
## But note now we can make predictions for JUST individual elk ignoring the variation between individuals in predation risk responses
elk2$fr.rc.pred3 <- predict(fr.rc, re.form = ~(1|elkuid) , type = "response")
summary(elk2$fr.rc.pred3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2259 0.5848 0.5184 0.7853 0.9999
hist(elk2$fr.rc.pred3)
### Explore relationships between different predictions
plot(elk2$fr.rc.pred2, elk2$fr.rc.pred)
##### This is the plot of the predictions from the unconditional predictions (X) versus the fully-specified random effects of risk|elkid (y)
ggpairs(elk2[46:49])
##### This plot compares the predictions from the full mixed-effect model (fr.rc.pred), the unconditional predictions ignoring the RE’s (fr.rc.pred2), the predictions from the model only considering the random effect of individual elk (not response to wolves, fr.rc.pred3), and the naive logistic regression results ignoring everything.
ggplot(elk2, aes(utmx, utmy, col = naive.pred)) + geom_point(size=5) + coord_equal() + scale_colour_gradient(low = 'yellow', high = 'red')
##### Now the random effects model with ctotrisk|elkuid
ggplot(elk2, aes(utmx, utmy, col = fr.rc.pred)) + geom_point(size=5) + coord_equal() + scale_colour_gradient(low = 'yellow', high = 'red')
##### Now the predictions unconditioned on any random effects
ggplot(elk2, aes(utmx, utmy, col = fr.rc.pred2)) + geom_point(size=5) + coord_equal() + scale_colour_gradient(low = 'yellow', high = 'red')
##### And finally, the spaital predictions holding effects of predation risk constant andonly considering vairation between elk (not as sensible in this example)
ggplot(elk2, aes(utmx, utmy, col = fr.rc.pred3)) + geom_point(size=5) + coord_equal() + scale_colour_gradient(low = 'yellow', high = 'red')
##### As the documentation for lme4::predict.merMod() notes:
##### There is no option for computing standard errors of predictions because it is difficult to define an efficient method that incorporates uncertainty in the variance parameters; we recommend lme4::bootMer() for this task.
#boot.fr.rc <- bootMer(fr.rc, FUN = function(x) as.numeric(logLik(x), nsim = 100))
#boot.fr.rc$mle
#### These are the MLE estimates of your beta coefficients given the model structure
##### Or in the cases where the model is too big or complex, you can extract the prediction intervals using predictInterval() from the package merTools
##### http://stats.stackexchange.com/questions/147836/prediction-interval-for-lmer-mixed-effects-model-in-r
#preds <- predictInterval(fr.rc, newdata = newDat, n.sims = 99)